##Overview

This document provides an overview of the population cluster derived from the High Resolution Layer (https://www.ciesin.columbia.edu/data/hrsl/) and OSM (https://www.openstreetmap.org/data) for the case of Nigeria. The data processing and analysis is done using R. This document provides a transparent documentation on the processes and results.

For the analysis the follwing packages are required:

## Warning: package 'raster' was built under R version 3.5.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.5.3
## Warning: package 'sf' was built under R version 3.5.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Warning: package 'rgdal' was built under R version 3.5.3
## rgdal: version: 1.4-3, (SVN revision 828)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/catherina.cader/offline/R/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/catherina.cader/offline/R/rgdal/proj
##  Linking to sp version: 1.3-1
## Warning: package 'rgeos' was built under R version 3.5.3
## rgeos version: 0.4-2, (SVN revision 581)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.3-1 
##  Polygon checking: TRUE
## Warning: package 'dbscan' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rgeos':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'mapview' was built under R version 3.5.3
## Warning: package 'tmap' was built under R version 3.5.3
## Warning: package 'geosphere' was built under R version 3.5.3
## Warning: package 'leaflet' was built under R version 3.5.3

Input data

Three data set are used: HRSL OSM *Admin boundaries

#read in spatial data

datapath<-("\\\\srv02\\RL-Institut\\04_Projekte\\240_NESP2\\03-Projektinhalte\\02_Data\\02_Data_from_online_sources\\05_Population_data\\hrsl_nga_v1")
builtup<-raster(file.path(datapath,"hrsl_nga_settlement.tif"))
adm1<-st_read("\\\\srv02\\RL-Institut\\04_Projekte\\240_NESP2\\03-Projektinhalte\\02_Data\\02_Data_from_online_sources\\04_Administrative_Boundaries\\nga_admbnda_adm1_osgof")
## Reading layer `nga_admbnda_adm1_osgof_20161215' from data source `\\srv02\RL-Institut\04_Projekte\240_NESP2\03-Projektinhalte\02_Data\02_Data_from_online_sources\04_Administrative_Boundaries\nga_admbnda_adm1_osgof' using driver `ESRI Shapefile'
## Simple feature collection with 37 features and 12 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 2.668534 ymin: 4.273007 xmax: 14.67882 ymax: 13.89442
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
adm1 %>% select(admin1Name) %>% ggplot() + geom_sf() + labs(title = "Nigeria")

adm1 %>% filter(admin1Name == "Kano") %>% ggplot() + geom_sf() + labs(title = "Kano")

state_list<-list()

Data processing

Extract the population raster data for each state

Clustering

The DBSCAN algorithm is used to cluster the points based on the distance to each other and a minimum number of points per cluster.

test=dbscan(input, eps = 0.00083, minPts = 3, weights = NULL)
input$cluster <- test$cluster

#split the original data into two according to whether dbscan has assigned or cluster or noise.

groups  <- input %>% filter(cluster != 0)
noise  <- input %>% filter(cluster== 0)

out <- input %>% 
  st_as_sf(coords = c("x","y"), crs = 4326) %>%
  group_by(cluster) %>% 
  summarise() %>% 
  st_convex_hull()

out %>% filter(cluster == 0) 
## Simple feature collection with 1 feature and 1 field
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 7.688408 ymin: 10.54979 xmax: 9.363963 ymax: 12.59645
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## # A tibble: 1 x 2
##   cluster                                                          geometry
##     <int>                                                    <POLYGON [°]>
## 1       0 ((8.74563 10.54979, 8.736185 10.55062, 8.716463 10.55367, 8.5495~
outfiltered <- out %>% filter(cluster != 0)
union <- st_union(outfiltered)

Visualization

The resulting clusters are shown in the map below.

powerlines<-st_read("\\\\srv02\\RL-Institut\\04_Projekte\\240_NESP2\\03-Projektinhalte\\02_Data\\02_Data_from_online_sources\\06_power_infrastructure\\columbia_grid_tracking\\kedco-grid-data-2016-shapefiles\\KEDCO_Grid_Data_Simplified_06172015_with_overlaps_removed.shp")
## Reading layer `KEDCO_Grid_Data_Simplified_06172015_with_overlaps_removed' from data source `\\srv02\RL-Institut\04_Projekte\240_NESP2\03-Projektinhalte\02_Data\02_Data_from_online_sources\06_power_infrastructure\columbia_grid_tracking\kedco-grid-data-2016-shapefiles\KEDCO_Grid_Data_Simplified_06172015_with_overlaps_removed.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5022 features and 8 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: 6.988658 ymin: 9.23255 xmax: 12.45614 ymax: 13.32508
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
mapview(st_geometry(powerlines$geometry))+
mapview(union[[1]], color= "red", col.regions = "red")
# Transform the sfc object to polygons

#poly <- st_collection_extract(union, type = "POLYGON", warn = FALSE)
#mapview(poly, color= "red", col.regions = "red")+
 # mapview(st_geometry(powerlines$geometry))




#Calculate centroids of the polygons:

#poly$centroids <-
  #st_centroid( x = poly ) %>%
 # st_geometry()

Distance calculations

Between the polygons and grid infrastructure

#powerlines<-st_read("\\\\srv02\\RL-Institut\\04_Projekte\\240_NESP2\\03-Projektinhalte\\02_Data\\02_Data_from_online_sources\\06_power_infrastructure\\columbia_grid_tracking\\kedco-grid-data-2016-shapefiles\\KEDCO_Grid_Data_Simplified_06172015_with_overlaps_removed.shp")

# Calculate distances -----------------------------------------------------

#dist <- st_distance(centroids, st_cast(st_geometry(powerlines), "POINT"), by_element = TRUE)